#Load libraries
#General
library(deconvSeq)
library(data.table)
library(plyr)
library(dplyr)
library(tidyverse)
library(ggplot2)
library(DESeq2)
library(matrixStats)
library(ComplexUpset)
library("RColorBrewer")
library(treemapify)
library(ggtree)
library(cowplot)
library(pheatmap)
library(MDSeq)
library(matrixStats)
#Annotation
organism="org.Mm.eg.db"
library(organism, character.only = TRUE)
library("genomation")
library("GenomicRanges")
library("biomaRt")
#GSEA and ORA
library(clusterProfiler)
library(enrichplot)
library(ggnewscale)
library(rrvgo)
#Get gene mappings and biotypes
httr::set_config(httr::config(ssl_verifypeer = FALSE))
mart <- useMart('ENSEMBL_MART_ENSEMBL')
mart <- useDataset('mmusculus_gene_ensembl', mart)
annotLookup <- getBM(
mart = mart,
attributes = c(
'ensembl_gene_id',
'ensembl_transcript_id',
'external_gene_name',
'gene_biotype',
'entrezgene_id'),
uniqueRows = TRUE)
#Read in transcript and CpG island annotations
gene.obj=readTranscriptFeatures("/home/jupyter/MOUNT/references/mm10.GRCm38.96.bed", up.flank = 2000,down.flank = 2000, remove.unusual=TRUE, unique.prom = TRUE)
cpg.obj=readFeatureFlank("/home/jupyter/MOUNT/references/cpgi.mm10.bed",feature.flank.name=c("CpGi","shores"), remove.unusual = TRUE)
bedfile<-fread("/home/jupyter/MOUNT/references/mm10.GRCm38.96.bed",header=F)
bedfile<-subset(bedfile,select=c("V1","V4","V6","V2","V3"))
colnames(bedfile)<-c("chr","feature.name","transStrand","transStart","transStop")
write.table(subset(as.data.frame(gene.obj$introns),score==1),"/home/jupyter/MOUNT/references/mm10.firstintrons.tsv",sep="\t",quote=FALSE,row.names=FALSE)
write.table(subset(as.data.frame(gene.obj$exons),score==1),"/home/jupyter/MOUNT/references/mm10.firstexons.tsv",sep="\t",quote=FALSE,row.names=FALSE)
## Function for annotating methylation matrix
annotateMeth <- function(data){
gr <- makeGRangesFromDataFrame(data, keep.extra.columns=FALSE,
start.field="start",
end.field="end")
#CpG island/shore/other annotation
cpgAnn=annotateWithFeatureFlank(gr,
cpg.obj$CpGi,cpg.obj$shores,feature.name="CpGi",flank.name="shores")
cpgMat<-as.data.frame(getMembers(cpgAnn))
cpgMat$CpG<-ifelse(cpgMat$CpGi==1,"Island",ifelse(cpgMat$shores==1,"Shore","Other"))
data<-cbind(data,cpgMat)
#Gene annotation
geneAnn=annotateWithGeneParts(gr,gene.obj)
geneMat<-cbind(getAssociationWithTSS(geneAnn), as.data.frame(getMembers(geneAnn)))
data<-cbind(data,geneMat)
data<-merge(data,bedfile,by=c("chr","feature.name"))
## Uncomment for adding repeats and enhancer annotations; can also be modified to add first exons/introns
# #Repeats annotation
# repeatsMat<-as.data.frame(getMembers(annotateWithFeature(gr, readGeneric("/home/jupyter/MOUNT/references/mm10.repeats.tsv",
# header=TRUE,meta.cols=list(RepFamily=5)),feature.name="RepFamily")))
# colnames(repeatsMat)<-c("Repeats")
# #Enhancer annotation
# enhMat<-as.data.frame(getMembers(annotateWithFeature(gr, readGeneric("/home/jupyter/MOUNT/references/mm10.enhancers.tsv",
# header=TRUE,meta.cols=list(Enhancer=4)),feature.name="Enhancer")))
# colnames(enhMat)<-c("Enhancer")
# data<-cbind(data,repeatsMat,enhMat)
#Workaround for a bug in genomation (https://github.com/BIMSBbioinfo/genomation/issues/203)
data$intron<-ifelse((data$start<=data$transStart & data$transStrand=="+"),0,
ifelse((data$start>=data$transStop & data$transStrand=="-"),0,data$intron))
data$Region<-ifelse(data$prom==1,"Promoter",
ifelse(data$exon==1, "Exon",
ifelse(data$intron==1,"Intron","Intergenic")))
#Filter on genomic context and significance
data<-subset(data,Region!="Intergenic")
data<-merge(data,annotLookup,by.x=c("feature.name"),by.y=c("ensembl_transcript_id"))
data
}
## Function for upset plot
createUpsetPlot <- function(data) {
df<-reshape2::dcast(data, gene~Type, length)
Groups = colnames(df)[2:3]
plotUpset<-upset(df, Groups, name='Groups', width_ratio=0.1, min_size=0, min_degree=1, set_sizes=FALSE,
queries=list(
upset_query(set='Control', fill='gray'),
upset_query(set='IR', fill='magenta')
),
themes=upset_default_themes(text=element_text(size=24,face="bold"),
axis.title=element_text(face="bold",size=26),
panel.border = element_rect(colour = "black", fill=NA, size=1)),
sort_intersections_by='cardinality',
base_annotations=list('Size'=(intersection_size(counts=TRUE))),
matrix=intersection_matrix(geom=geom_point(shape='square filled',size=5)))
plotUpset
}
#Sample manifest
manifest<-fread("/home/jupyter/MOUNT/integrated/manifest.txt",header=T,sep="\t")
manifest<- manifest %>% dplyr::filter(Brain_rna!="Mmus_C57_6J_BRN_HLU_IRC_4mon_Rep5_M93") %>%
dplyr::filter(Tissue=="Both" & Group %in% c("IR","Control"))
manifest[1:2,]
| Subject | Rep | Tissue | Sample | Timepoint | Group | Brain_meth | Retina_meth | Brain_rna | Retina_rna | Count | Brain_age | Retina_age |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| <chr> | <chr> | <chr> | <chr> | <chr> | <chr> | <chr> | <chr> | <chr> | <chr> | <int> | <dbl> | <dbl> |
| M83 | Rep1 | Both | HLLC_IRC_4mon_Rep1_M83 | 4m | Control | CFG1778 | CFG1837 | Mmus_C57_6J_BRN_HLLC_IRC_4mon_Rep1_M83 | Mmus_C57_6J_RTN_HLLC_IRC_4mon_Rep1_M83 | 4 | 13.3 | 3.4 |
| M85 | Rep2 | Both | HLLC_IRC_4mon_Rep2_M85 | 4m | Control | CFG1780 | CFG1839 | Mmus_C57_6J_BRN_HLLC_IRC_4mon_Rep2_M85 | Mmus_C57_6J_RTN_HLLC_IRC_4mon_Rep2_M85 | 4 | 14.1 | NA |
#Process and merge brain methylation data
samples_202=manifest$Brain_meth
files_202=paste("GLDS-202_Epigenomics_R1_",samples_202,
"_trimmed.fq_trimmed_bismark_bt2.bismark.cov.gz",sep="")
setwd("/home/jupyter/MOUNT/integrated/bismark/glds202")
methmat_202_all = getmethmat(filnames=files_202, sample.id=samples_202, filtype="bismarkCoverage")
methmat_202_all<-annotateMeth(methmat_202_all)
#Process and merge retina methylation data
samples_203=manifest$Retina_meth
files_203=paste("GLDS-203_Epigenomics_R1_",samples_203,
"_trimmed.fq_trimmed_bismark_bt2.bismark.cov.gz",sep="")
setwd("/home/jupyter/MOUNT/integrated/bismark/glds203")
methmat_203_all = getmethmat(filnames=files_203, sample.id=samples_203, filtype="bismarkCoverage")
methmat_203_all<-annotateMeth(methmat_203_all)
colnames(methmat_202_all)
#Only keep sites with >=10 reads in all samples
selectedCols <- methmat_202_all[grep("coverage", names(methmat_202_all), fixed = T)]
methmat_202_all <- methmat_202_all[apply(selectedCols, 1,
function(x) all(!is.na(x) & x>=10)), ]
selectedCols <- methmat_203_all[grep("coverage", names(methmat_203_all), fixed = T)]
methmat_203_all <- methmat_203_all[apply(selectedCols, 1,
function(x) all(!is.na(x) & x>=10)), ]
colMeans(methmat_202_all[grep("coverage", names(methmat_202_all), fixed = T)])
colMeans(methmat_203_all[grep("coverage", names(methmat_203_all), fixed = T)])
dim(methmat_202_all)
#Calculate methylation % for all sites/samples
methmat_202<-methmat_202_all
methmat_203<-methmat_203_all
for (i in 1:length(samples_202)){
numCs <- rlang::sym(paste("numCs", i, sep = ""))
coverage <- rlang::sym(paste("coverage", i, sep = ""))
methmat_202<-methmat_202 %>%
dplyr::mutate(!!paste0("ratio_",manifest$Brain_meth[i]) := !!numCs/!!coverage)
methmat_203<-methmat_203 %>%
dplyr::mutate(!!paste0("ratio_",manifest$Retina_meth[i]) := !!numCs/!!coverage)
}
#Calculate ratios (per site) and filter sites with sd<0.02
methmat_202 <- methmat_202 %>% mutate(mean = select(., starts_with("ratio_")) %>% rowMeans(na.rm = TRUE),
sd = rowSds(as.matrix(select(.,starts_with("ratio_"))),na.rm = TRUE)) %>%
filter(sd > 0.02)
methmat_203 <- methmat_203 %>% mutate(mean = select(., starts_with("ratio_")) %>% rowMeans(na.rm = TRUE),
sd = rowSds(as.matrix(select(.,starts_with("ratio_"))),na.rm = TRUE)) %>%
filter(sd > 0.02)
methmat_202[1,]
| feature.name | chr | start | end | strand | coverage1 | numCs1 | numTs1 | coverage2 | numCs2 | ⋯ | ratio_CFG1780 | ratio_CFG1771 | ratio_CFG1775 | ratio_CFG1777 | ratio_CFG1779 | ratio_CFG1783 | ratio_CFG1788 | ratio_CFG1789 | mean | sd | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| <chr> | <chr> | <int> | <int> | <chr> | <int> | <int> | <int> | <int> | <int> | ⋯ | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | |
| 1 | ENSMUST00000000001 | chr3 | 108146074 | 108146074 | * | 28 | 0 | 28 | 11 | 0 | ⋯ | 0 | 0 | 0 | 0.04347826 | 0.02352941 | 0 | 0 | 0.05555556 | 0.01361814 | 0.02196966 |
## Uncomment if only degree of hypermethylation needs to be evaluated
#for (i in 1:length(samples_202)){
# numCs <- rlang::sym(paste("numCs", i, sep = ""))
# coverage <- rlang::sym(paste("coverage", i, sep = ""))
# methmat_202<-methmat_202 %>% dplyr::mutate(!!coverage := ifelse(!!sym(numCs)==0,NA,!!sym(coverage)), !!numCs := ifelse(!!sym(numCs)==0,NA,!!sym(numCs)))
# methmat_203<-methmat_203 %>% dplyr::mutate(!!coverage := ifelse(!!sym(numCs)==0,NA,!!sym(coverage)), !!numCs := ifelse(!!sym(numCs)==0,NA,!!sym(numCs)))
#}
#Calculate weighted methylation for each gene
methmat_202_counts<- methmat_202 %>% dplyr::filter(Region=="Intron") %>%
dplyr::select(c("external_gene_name","Region",contains(c("numCs","coverage")))) %>%
dplyr::group_by(external_gene_name,Region) %>%
summarise(across(starts_with(c('numCs', 'coverage')), sum, na.rm=TRUE),.groups = 'drop')
methmat_203_counts<- methmat_203 %>% dplyr::filter(Region=="Intron") %>%
dplyr::select(c("external_gene_name","Region",contains(c("numCs","coverage")))) %>%
dplyr::group_by(external_gene_name,Region) %>%
summarise(across(starts_with(c('numCs', 'coverage')), sum, na.rm=TRUE),.groups = 'drop')
for (i in 1:length(samples_202)){
coverage <- rlang::sym(paste("coverage", i, sep = ""))
numCs <- rlang::sym(paste("numCs", i, sep = ""))
methmat_202_counts<-methmat_202_counts %>% mutate(!!paste("ratio",manifest$Brain_meth[i],sep="_") := !!numCs/!!coverage)
methmat_203_counts<-methmat_203_counts %>% mutate(!!paste("ratio",manifest$Retina_meth[i],sep="_") := !!numCs/!!coverage)
}
#Methylation ratio distribution
library(ggpubr)
df_202 <- as.data.frame(gather
(as.data.frame(methmat_202_counts %>% select(c("CpG","Region",contains("ratio_")))),
key='key',value='value',-c('CpG','Region')))
df_202$key<-gsub("ratio_","",df_202$key)
df_203 <- as.data.frame(gather
(as.data.frame(methmat_203_counts %>% select(c("CpG","Region",contains("ratio_")))),
key='key',value='value',-c('CpG','Region')))
df_203$key<-gsub("ratio_","",df_203$key)
df_202<-merge(df_202,manifest,by.x=c("key"),by.y=c("Brain_meth"))
df_202$CpG<-ifelse(df_202$CpG=="Island","CpGi",ifelse(df_202$CpG=="Shore","CpGs","Other"))
df_203<-merge(df_203,manifest,by.x=c("key"),by.y=c("Retina_meth"))
df_203$CpG<-ifelse(df_203$CpG=="Island","CpGi",ifelse(df_203$CpG=="Shore","CpGs","Other"))
options(repr.plot.width=10, repr.plot.height=20)
plot_grid(ggboxplot(subset(df_202), x = "Group", y = "value",
color = "Group", outlier.shape = NA) + stat_compare_means(method="t.test") + facet_wrap(CpG~Region),
ggboxplot(subset(df_203), x = "Group", y = "value",
color = "Group", outlier.shape = NA) + stat_compare_means(method="t.test") + facet_wrap(CpG~Region),nrow=2)
p3<-ggplot(subset(df_202)) + geom_density(aes(color=paste(Group,Subject),x=value,..scaled..),size=1) +
xlab("Methylation level") + ylab("Probability density") +
theme(axis.text = element_text(size = 16), axis.title = element_text(size = 20)) + facet_wrap(CpG~Region)
p4<-ggplot(subset(df_203)) + geom_density(aes(color=paste(Group,Subject),x=value,..scaled..),size=1) +
xlab("Methylation level") + ylab("Probability density") +
theme(axis.text = element_text(size = 16), axis.title = element_text(size = 20)) + facet_wrap(CpG~Region)
options(repr.plot.width=15, repr.plot.height=15)
plot_grid(p3 +
scale_color_manual(values=c("blue","blue","blue","blue","red","red","red","red","red","red"))+
ggtitle("BRAIN") + theme(plot.title = element_text(size=20)),
p4 +
scale_color_manual(values=c("blue","blue","blue","blue","red","red","red","red","red","red"))+
ggtitle("RETINA") + theme(plot.title = element_text(size=20)),
nrow=2)
#Compute PCs
matPC<-subset(methmat_203_counts,Region=="Intron")[,grep("ratio", names(methmat_203_counts))]
matPC[is.na(matPC)] <- 0
dfWide.pca <- prcomp(t(matPC), center = TRUE)
dfWide.pca <- as.data.frame(dfWide.pca$x)
dfWide.pca$ID<-gsub("ratio_","",row.names(dfWide.pca))
dfWide.pca<-merge(dfWide.pca,manifest,by.x=c("ID"),by.y=c("Retina_meth"))
#PCA plots
options(repr.plot.width=5, repr.plot.height=5)
retinaPC<-ggplot(
dfWide.pca,
aes(x = PC1, y = PC2,color=Group,label=Subject)
) + geom_text() +
scale_shape_manual(values=c(17,16)) +
geom_hline(yintercept = 0, colour = "gray65") +
geom_vline(xintercept = 0, colour = "gray65") + theme_classic()
#Compute PCs
matPC<-subset(methmat_202_counts,Region=="Intron")[,grep("ratio", names(methmat_202_counts))]
matPC[is.na(matPC)] <- 0
dfWide.pca <- prcomp(t(matPC), center = TRUE)
dfWide.pca <- as.data.frame(dfWide.pca$x)
dfWide.pca$ID<-gsub("ratio_","",row.names(dfWide.pca))
dfWide.pca<-merge(dfWide.pca,manifest,by.x=c("ID"),by.y=c("Brain_meth"))
#PCA plots
options(repr.plot.width=5, repr.plot.height=5)
brainPC<-ggplot(
dfWide.pca,
aes(x = PC1, y = PC2,color=Group,label=Subject)
) + geom_text() +
scale_shape_manual(values=c(17,16)) +
geom_hline(yintercept = 0, colour = "gray65") +
geom_vline(xintercept = 0, colour = "gray65") + theme_classic()
options(repr.plot.width=15, repr.plot.height=5)
plot_grid(brainPC,retinaPC,nrow=1)
genesCoding<-unique(as.data.frame(subset(annotLookup,
gene_biotype=="protein_coding" | grepl("RNA",gene_biotype),
select=c("ensembl_gene_id"))))
colnames(genesCoding) <-c("gene")
#BRN dataset (GLDS-202)
counts202<-read.delim("/home/jupyter/MOUNT/integrated/GLDS-202_rna_seq_Unnormalized_Counts.csv",sep=",",row.names=1,header=TRUE)
counts202<-counts202 %>% dplyr::filter(!grepl("ERCC",row.names(counts202))) %>%
dplyr::select(manifest$Brain_rna)
genesCounts<-as.data.frame(row.names(counts202))
colnames(genesCounts) <- c("gene")
merged<-merge(genesCounts,genesCoding,by=c("gene"))
counts202 <- counts202[merged$gene,]
coldata202<-as.data.frame(colnames(counts202))
colnames(coldata202)<-c("SampleName")
coldata202$Group<-manifest$Group
dds202 <- DESeqDataSetFromMatrix(countData = round(counts202),colData = coldata202,design =~ 1 )
ddsNofilt202 <- estimateSizeFactors(dds202)
keep <- rowSums(counts(ddsNofilt202, normalized=TRUE) >= 5 ) >= 2 &
rowSums(counts(ddsNofilt202, normalized=TRUE) == 0 ) <= 4
dds202 <- ddsNofilt202[keep,]
counts202<-log2(as.data.frame(counts(dds202, normalized=TRUE))+1)
counts202$ensembl_gene_id<-rownames(counts202)
counts202<-merge(counts202,unique(subset(annotLookup,select=c("ensembl_gene_id","external_gene_name"))),by=c("ensembl_gene_id"))
#RTN dataset (GLDS-203)
counts203<-read.delim("/home/jupyter/MOUNT/integrated/GLDS-203_rna_seq_Unnormalized_Counts.csv",sep=",",row.names=1,header=TRUE)
counts203<-counts203 %>% filter(!grepl("ERCC",row.names(counts203))) %>%
dplyr::select(manifest$Retina_rna)
genesCounts<-as.data.frame(row.names(counts203))
colnames(genesCounts) <-c("gene")
merged<-merge(genesCounts,genesCoding,by=c("gene"))
counts203 <- counts203[merged$gene,]
coldata203<-as.data.frame(colnames(counts203))
colnames(coldata203)<-c("SampleName")
coldata203$Group<-manifest$Group
dds203 <- DESeqDataSetFromMatrix(countData = round(counts203),colData = coldata203,design =~ 1 )
ddsNofilt203 <- estimateSizeFactors(dds203)
keep <- rowSums(counts(ddsNofilt203, normalized=TRUE) >= 5 ) >= 2 &
rowSums(counts(ddsNofilt203, normalized=TRUE) == 0 ) <= 4
dds203 <- ddsNofilt203[keep,]
counts203<-log2(as.data.frame(counts(dds203, normalized=TRUE))+1)
counts203$ensembl_gene_id<-rownames(counts203)
counts203<-merge(counts203,unique(subset(annotLookup,select=c("ensembl_gene_id","external_gene_name"))),by=c("ensembl_gene_id"))
dim(counts202)
dim(counts203)
converting counts to integer mode converting counts to integer mode
Within each exposure group, calculate Pearson correlation for each gene promoter (CpGi/s) using brain-retina matched pairs
#Merge brain and retina data on common genes
mergedMeth<-merge(methmat_202_counts %>% dplyr::select(c("external_gene_name",contains("ratio"))),
methmat_203_counts %>% dplyr::select(c("external_gene_name",contains("ratio"))),
by=c("external_gene_name"))
mergedMeth[is.na(mergedMeth)] <- 0
#Calculate correlation between brain and retina for each gene within each group
for (group in c("IR","Control")){
tmp202<-as.matrix(mergedMeth[,paste0("ratio_",subset(manifest,Group==group)$Brain_meth)])
tmp203<-as.matrix(mergedMeth[,paste0("ratio_",subset(manifest,Group==group)$Retina_meth)])
corMatTmp<-suppressWarnings(sapply(seq.int(dim(tmp202)[1]),
function(i) cor.test(tmp202[i,], tmp203[i,])))
corMatTmp<-as.data.frame(t(corMatTmp)) %>% dplyr::select(statistic,p.value,estimate,conf.int) %>%
rename_with( ~ paste0(.x,".",group))
assign(paste0("corMat_",group),corMatTmp)
}
corMatMeth<-cbind(corMat_IR,corMat_Control)
corMatMeth$gene<-mergedMeth$external_gene_name
#Correlated genes (positive and negative)
posCorMeth<-unique(rbind(as.data.frame(corMatMeth %>% dplyr::filter(p.value.IR<0.05 & estimate.IR!=0)
%>% dplyr::select(gene) %>% mutate(Type="IR")),
as.data.frame(corMatMeth %>% dplyr::filter(p.value.Control<0.05 & estimate.Control>0)
%>% dplyr::select(gene) %>% mutate(Type="Control"))))
posCorMeth$Class<-"RRBS"
mergedRna<-merge(counts202,counts203,by="ensembl_gene_id")
for (group in c("IR","Control")){
tmp202<-as.matrix(t(apply(mergedRna[,subset(manifest,Group==group)$Brain_rna],1,scale)))
tmp203<-as.matrix(t(apply(mergedRna[,subset(manifest,Group==group)$Retina_rna],1,scale)))
corMatTmp<-suppressWarnings(sapply(seq.int(dim(tmp202)[1]),
function(i) cor.test(tmp202[i,], tmp203[i,])))
corMatTmp<-as.data.frame(t(corMatTmp)) %>% dplyr::select(statistic,p.value,estimate,conf.int) %>%
rename_with( ~ paste0(.x,".",group))
assign(paste0("corMatRna_",group),corMatTmp)
}
corMatRna<-cbind(corMatRna_IR,corMatRna_Control)
corMatRna$ensembl_gene_id<-mergedRna$ensembl_gene_id
corMatRna<-merge(corMatRna,unique(subset(annotLookup,select=c("ensembl_gene_id","external_gene_name"))),by=c("ensembl_gene_id"))
corMatRna <- corMatRna %>% rename(gene=external_gene_name)
#Correlated genes (positive and negative)
posCorRna<-unique(rbind(as.data.frame(corMatRna %>% dplyr::filter(p.value.IR<0.05 & estimate.IR!=0)
%>% dplyr::select(gene) %>% mutate(Type="IR")),
as.data.frame(corMatRna %>% dplyr::filter(p.value.Control<0.05 & estimate.Control>0)
%>% dplyr::select(gene) %>% mutate(Type="Control"))))
posCorRna$Class<-"RNA-Seq"
Within each exposure group for each tissue type, calculate Pearson correlation for each gene promoter (CpGi/s) using RRBS-RNA-seq matched pairs
methmat_203_counts[is.na(methmat_203_counts)] <- 0
methmat_202_counts[is.na(methmat_202_counts)] <- 0
merged203<-merge(counts203,methmat_203_counts,by="external_gene_name")
for (group in c("IR","Control")){
tmpMeth<-as.matrix(merged203[,paste0("ratio_",subset(manifest,Group==group)$Retina_meth)])
tmpRna<-as.matrix(t(apply(merged203[,subset(manifest,Group==group)$Retina_rna],1,scale)))
corMat<-suppressWarnings(sapply(seq.int(dim(tmpMeth)[1]),
function(i) cor.test(tmpMeth[i,], tmpRna[i,])))
corMat<-as.data.frame(t(corMat)) %>% dplyr::select(statistic,p.value,estimate,conf.int) %>%
rename_with( ~ paste0(.x,".",group))
assign(paste0("corMatRetina_",group),corMat)
}
merged202<-merge(counts202,methmat_202_counts,by="external_gene_name")
for (group in c("IR","Control")){
samples_rna<-subset(manifest,Group==group)$Brain_rna
samples_meth<-subset(manifest,Group==group)$Brain_meth
tmpMeth<-as.matrix(merged202[,paste0("ratio_",samples_meth)])
tmpRna<-as.matrix(t(apply(merged202[,samples_rna],1,scale)))
corMat<-suppressWarnings(sapply(seq.int(dim(tmpMeth)[1]),
function(i) cor.test(tmpMeth[i,], tmpRna[i,])))
corMat<-as.data.frame(t(corMat)) %>% dplyr::select(statistic,p.value,estimate,conf.int) %>%
rename_with( ~ paste0(.x,".",group))
assign(paste0("corMatBrain_",group),corMat)
}
corMatBrain<-cbind(corMatBrain_IR,corMatBrain_Control)
corMatRetina<-cbind(corMatRetina_IR,corMatRetina_Control)
corMatBrain$gene<-merged202$external_gene_name
corMatRetina$gene<-merged203$external_gene_name
dim(corMatBrain)
dim(corMatRetina)
#Correlated between RRBS and RNA-Seq
posCorRetina<-unique(rbind(as.data.frame(corMatRetina %>% dplyr::filter(p.value.IR<0.05 & estimate.IR!=0)
%>% dplyr::select(gene) %>% mutate(Type="IR")),
as.data.frame(corMatRetina %>% dplyr::filter(p.value.Control<0.05 & estimate.Control>0)
%>% dplyr::select(gene) %>% mutate(Type="Control"))))
posCorRetina$Class<-"RTN"
#Correlated between RRBS and RNA-Seq
posCorBrain<-unique(rbind(as.data.frame(corMatBrain %>% dplyr::filter(p.value.IR<0.05 & estimate.IR!=0)
%>% dplyr::select(gene) %>% mutate(Type="IR")),
as.data.frame(corMatBrain %>% dplyr::filter(p.value.Control<0.05 & estimate.Control>0)
%>% dplyr::select(gene) %>% mutate(Type="Control"))))
posCorBrain$Class<-"BRN"
allCor<-rbind(posCorMeth,posCorRna,posCorBrain,posCorRetina)
allCor$Type<-paste(allCor$Class,allCor$Type,sep=" ")
df<-reshape2::dcast(allCor, gene~Type, length)
df[1,]
#Groups = colnames(df)[2:9]
Groups=c("RTN Control","BRN Control","RNA-Seq Control","RRBS Control","RTN IR","BRN IR","RNA-Seq IR","RRBS IR")
plotUpset2<-upset(df, Groups, name='Groups', width_ratio=0.1, min_size=0, min_degree=1, set_sizes=FALSE, sort_sets=FALSE,
queries=list(
upset_query(set='RNA-Seq Control', fill='lightblue'),
upset_query(set='RNA-Seq IR', fill='orange'),
upset_query(set='RRBS Control', fill='blue'),
upset_query(set='RRBS IR', fill='brown'),
upset_query(set='RTN Control', fill='darkgrey'),
upset_query(set='RTN IR', fill='red'),
upset_query(set='BRN Control', fill='cyan'),
upset_query(set='BRN IR', fill='magenta')
),
themes=upset_default_themes(text=element_text(size=20,face="bold"),
axis.title=element_text(face="bold",size=20),
panel.border = element_rect(colour = "black", fill=NA, size=1)),
#sort_intersections_by='cardinality',
base_annotations=list('Size'=(intersection_size(counts=TRUE))),
matrix=intersection_matrix(geom=geom_point(shape='square filled',size=5)))
options(repr.plot.width=20, repr.plot.height=10)
plotUpset2 + ggtitle("Genes correlated between expression and methylation within each tissue OR between brain and retina within each assay") + theme(plot.title = element_text(size=20))
dfSum = df %>% mutate(sum = select(., !starts_with("gene") & !contains("Control")) %>% rowSums(na.rm = TRUE))
Using Class as value column: use value.var to override.
| gene | BRN Control | BRN IR | RNA-Seq Control | RNA-Seq IR | RRBS Control | RRBS IR | RTN Control | RTN IR | |
|---|---|---|---|---|---|---|---|---|---|
| <chr> | <int> | <int> | <int> | <int> | <int> | <int> | <int> | <int> | |
| 1 | 1110002E22Rik | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 |
genes<-subset(dfSum, `RTN IR`==1 &
`RTN Control`==0 & `BRN Control`==0 & `RRBS Control`==0 & `RNA-Seq Control`==0,
select=c("gene"))
genes$gene
options(repr.plot.width=12, repr.plot.height=10)
retinaEgo <- enrichGO(gene = as.character(unique(as.vector(genes$gene))),
OrgDb = org.Mm.eg.db,
keyType = 'SYMBOL',
minGSSize = 5,maxGSSize= 500,
#universe = as.character(unique(as.vector(bkgGenes$ensembl_gene_id))),
ont = "ALL",
pAdjustMethod = "BH",
pvalueCutoff = 0.25,
qvalueCutoff = 0.25)
plot_grid(dotplot(brainEgo, split="ONTOLOGY") + facet_grid(ONTOLOGY~., scale="free"),
dotplot(retinaEgo, split="ONTOLOGY") + facet_grid(ONTOLOGY~., scale="free"))
dim(retinaEgo)
dim(brainEgo)
all<-rbind(subset(as.data.frame(retinaEgo) %>% mutate(tissue="RTN"),`p.adjust`<0.25),
subset(as.data.frame(brainEgo) %>% mutate(tissue="BRN"),`p.adjust`<0.25))
ddply(all,.(tissue,ONTOLOGY),nrow)
tmpAll<-ddply(all,.(Description),nrow)
all<-merge(tmpAll,all,by=c("Description"))
| tissue | ONTOLOGY | V1 |
|---|---|---|
| <chr> | <chr> | <int> |
| BRN | BP | 61 |
| BRN | CC | 123 |
| BRN | MF | 27 |
| RTN | BP | 958 |
| RTN | CC | 169 |
| RTN | MF | 258 |
dmls202<-fread("/home/jupyter/MOUNT/integrated/GLDS-202_dml.tsv",header=T) %>% dplyr::filter(Type=="4m_IR")
dmls203<-fread("/home/jupyter/MOUNT/integrated/GLDS-203_dml.tsv",header=T) %>% dplyr::filter(Type=="4m_IR")
dmls202[1,]
dmls203[1,]
ddply(dmls202,.(Region),nrow)
ddply(dmls203,.(Region),nrow)
sort(unique(subset(dmls202,qvalue<0.05 & abs(meth.diff)>5 & Region=="Gene_body" & !grepl("Rik|Gm",external_gene_name))$external_gene_name))
| feature.name | target.row | dist.to.feature | feature.strand | chr | start | end | strand | pvalue | qvalue | meth.diff | prom | exon | intron | Region | Type | ensembl_gene_id | external_gene_name | gene_biotype |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| <chr> | <int> | <int> | <chr> | <chr> | <int> | <int> | <chr> | <dbl> | <dbl> | <dbl> | <int> | <int> | <int> | <chr> | <chr> | <chr> | <chr> | <chr> |
| ENSMUST00000000094 | 5539 | 69 | + | chr2 | 156721137 | 156721137 | * | 0.000100267 | 0.03768793 | -1.290323 | 1 | 0 | 1 | Promoter | 4m_IR | ENSMUSG00000061689 | Dlgap4 | protein_coding |
| chr | feature.name | target.row | dist.to.feature | feature.strand | start | end | strand | pvalue | qvalue | meth.diff | Type | ensembl_gene_id | external_gene_name | gene_biotype | transStrand | transStart | transStop |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| <chr> | <chr> | <int> | <int> | <chr> | <int> | <int> | <chr> | <dbl> | <dbl> | <dbl> | <chr> | <chr> | <chr> | <chr> | <chr> | <int> | <int> |
| chr1 | ENSMUST00000027083 | 17 | 49765 | + | 65361020 | 65361020 | * | 7.813858e-06 | 0.02808006 | -4.595186 | 4m_IR | ENSMUSG00000025946 | Pth2r | protein_coding | + | 65311256 | 65389244 |
| Region | V1 |
|---|---|
| <chr> | <int> |
| Gene_body | 1389 |
| Intergenic | 948 |
| Promoter | 7373 |
Error in FUN(X[[i]], ...): object 'Region' not found Traceback: 1. ddply(dmls203, .(Region), nrow) 2. splitter_d(.data, .variables, drop = .drop) 3. eval.quoted(.variables, data) 4. lapply(exprs, eval, envir = envir, enclos = enclos) 5. FUN(X[[i]], ...) 6. FUN(X[[i]], ...)
tmp<-subset(all,V1==2 & ONTOLOGY=="BP" & tissue=="RTN")
simMatrix <- calculateSimMatrix(tmp$ID,
orgdb="org.Mm.eg.db",
ont="BP",
method="Rel")
scores <- setNames(-log10(tmp$`p.adjust`), tmp$ID)
reducedTerms_Combined <- reduceSimMatrix(simMatrix,
scores,
orgdb="org.Mm.eg.db")
preparing gene to GO mapping data... preparing IC data... Warning message in calculateSimMatrix(tmp$ID, orgdb = "org.Mm.eg.db", ont = "BP", : “Removed 1 terms that were not found in orgdb for BP”
subset(all,V1==2 & ONTOLOGY=="BP" & tissue=="BRN")$Description
subset(all, grepl("radiation|methylation",Description))
subset(all, grepl("Mgmt",geneID))
| Description | V1 | ONTOLOGY | ID | GeneRatio | BgRatio | pvalue | p.adjust | qvalue | geneID | Count | tissue | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| <chr> | <int> | <chr> | <chr> | <chr> | <chr> | <dbl> | <dbl> | <dbl> | <chr> | <int> | <chr> | |
| 492 | histone H3-K4 methylation | 1 | BP | GO:0051568 | 2/214 | 61/29008 | 0.074687099 | 0.2462426 | 0.2032980 | Gata3/Rlf | 2 | RTN |
| 493 | histone H3-K4 monomethylation | 1 | BP | GO:0097692 | 1/214 | 9/29008 | 0.064478193 | 0.2322778 | 0.1917686 | Rlf | 1 | RTN |
| 607 | macromolecule methylation | 1 | BP | GO:0043414 | 6/214 | 314/29008 | 0.029535963 | 0.2115202 | 0.1746311 | Gata3/Icmt/Larp7/Pcif1/Rlf/Trmt5 | 6 | RTN |
| 642 | methylation | 1 | BP | GO:0032259 | 7/214 | 363/29008 | 0.018631267 | 0.1721567 | 0.1421326 | Gata3/Icmt/Larp7/Mecom/Pcif1/Rlf/Trmt5 | 7 | RTN |
| 978 | positive regulation of DNA methylation-dependent heterochromatin assembly | 1 | BP | GO:0090309 | 2/199 | 11/29008 | 0.002472642 | 0.1880986 | 0.1706657 | Dnmt1/Pphln1 | 2 | BRN |
| 1214 | regulation of DNA demethylation | 1 | BP | GO:1901535 | 1/214 | 8/29008 | 0.057523365 | 0.2260175 | 0.1866001 | Gata3 | 1 | RTN |
| 1215 | regulation of DNA methylation-dependent heterochromatin assembly | 1 | BP | GO:0090308 | 2/199 | 14/29008 | 0.004036079 | 0.1941354 | 0.1761430 | Dnmt1/Pphln1 | 2 | BRN |
| 1367 | response to ionizing radiation | 1 | BP | GO:0010212 | 4/214 | 133/29008 | 0.017146884 | 0.1659952 | 0.1370457 | Eya3/Gata3/Rad9b/Xrcc4 | 4 | RTN |
| 1373 | response to radiation | 1 | BP | GO:0009314 | 8/214 | 430/29008 | 0.014943973 | 0.1569497 | 0.1295777 | Atp8a2/Cds2/Eya3/Gata3/Grin2a/Meis2/Rad9b/Xrcc4 | 8 | RTN |
| 1384 | RNA methylation | 1 | BP | GO:0001510 | 3/214 | 70/29008 | 0.015085960 | 0.1569497 | 0.1295777 | Larp7/Pcif1/Trmt5 | 3 | RTN |
| Description | V1 | ONTOLOGY | ID | GeneRatio | BgRatio | pvalue | p.adjust | qvalue | geneID | Count | tissue |
|---|---|---|---|---|---|---|---|---|---|---|---|
| <chr> | <int> | <chr> | <chr> | <chr> | <chr> | <dbl> | <dbl> | <dbl> | <chr> | <int> | <chr> |
options(repr.plot.width=10, repr.plot.height=10)
ggplot(reducedTerms_Combined, aes(area = score, subgroup = parentTerm, fill = parentTerm, label = term)) +
geom_treemap(size=0.5, col='white') +
geom_treemap_text(col='gray',place="center",reflow=TRUE) +
geom_treemap_subgroup_border(col='white',size=1) +
geom_treemap_subgroup_text(col='white',place="center",fontface="bold",size=12,reflow=TRUE,grow=TRUE,min.size=1) + theme(plot.title = element_text(hjust=0.5,size=10)) +
guides(fill=F)
Warning message: “The `<scale>` argument of `guides()` cannot be `FALSE`. Use "none" instead as of ggplot2 3.3.4.”
egoDf<-subset(as.data.frame(all),ONTOLOGY=="BP" & Count>=5 & `p.adjust`<0.25)
egoDf<-subset(egoDf,select=c("Description","geneID")) %>% mutate(geneID = strsplit(as.character(geneID), "/")) %>% unnest(geneID)
tmp<-ddply(egoDf,.(geneID),nrow)
egoDf<-merge(egoDf,tmp,by=c("geneID"))
egoDf$geneID <- reorder(egoDf$geneID, -egoDf$V1)
egoDf$Description <- reorder(egoDf$Description, egoDf$V1)
options(repr.plot.width=35, repr.plot.height=12)
ggplot(egoDf[order(egoDf$V1),], aes(x=geneID, y=Description)) +
geom_tile(fill="darkgreen") +
theme(axis.text.x = element_text(angle = -90, hjust = 0),text=element_text(size=30)) +
scale_fill_discrete(guide=F) + ylab("Biological process") + xlab("Gene")
setwd("/home/jupyter/glds202_203/Pathways")
cases<-c("IR")
for (j in 1:length(cases)){
title<-cases[j]
tmpData<-subset(as.data.frame(corMatMeth),select=c(paste0("statistic.",title),paste0("p.value.",title),"gene"))
colnames(tmpData)<-c("estimate","pvalue","gene")
tmpData<-subset(tmpData,!is.na(estimate) & estimate!=Inf & estimate!= -Inf)
#tmpData$stat<-sign(as.numeric(tmpData$estimate))* -log10(as.numeric(tmpData$pvalue))
tmpData$stat<-as.numeric(tmpData$estimate)
tmpData<-subset(tmpData,select=c("stat","gene"))
tmpData<-subset(tmpData,!is.na(stat))
rankMetric <- as.numeric(tmpData$stat)
names(rankMetric) <- tmpData$gene
rankMetric <- sort(rankMetric, decreasing = TRUE)
####GO####
gseaGO <- gseGO(geneList=rankMetric,
ont ="BP",
keyType="SYMBOL",
pvalueCutoff = 0.25,
by = "fgsea",
pAdjustMethod = "BH",
OrgDb = org.Mm.eg.db)
if(dim(gseaGO)[1]>0){
gseaRes<-data.frame(gseaGO)
gseaRes$Dataset<-"RRBS"
gseaRes$Group<-title
write.table(gseaRes,paste0("RRBS_",title,"_clusterProfiler.GO.BP.all.symbol.tsv"),sep="\t",quote=FALSE,row.names=F)
}
}
preparing geneSet collections... GSEA analysis... Warning message in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : “There are ties in the preranked stats (0.14% of the list). The order of those tied genes will be arbitrary, which may produce unexpected results.” no term enriched under specific pvalueCutoff...
min(tmpData$stat)
corMatMeth[1:3,]
| statistic.IR | p.value.IR | estimate.IR | conf.int.IR | statistic.Control | p.value.Control | estimate.Control | conf.int.Control | gene | |
|---|---|---|---|---|---|---|---|---|---|
| <list> | <list> | <list> | <list> | <list> | <list> | <list> | <list> | <chr> | |
| 1 | -0.1219599 | 0.9106417 | -0.07023966 | -0.8969236, 0.8656726 | -1.975046 | 0.186942 | -0.813058 | -0.9959165, 0.6772293 | |
| 2 | -0.5073596 | 0.6468486 | -0.281112 | -0.9321825, 0.7994241 | 0.4598038 | 0.6908018 | 0.3091982 | -0.9275152, 0.9792771 | 0610009B22Rik |
| 3 | -0.6299263 | 0.5734523 | -0.341786 | -0.9404601, 0.7738243 | -1.10546 | 0.3841465 | -0.6158535 | -0.9906097, 0.8459303 | 0610009E02Rik |
simMatrix <- calculateSimMatrix(as.data.frame(gseRRBS)$ID,
orgdb="org.Mm.eg.db",
ont="BP",
method="Rel")
scores <- setNames(-log10(as.data.frame(gseRRBS)$`p.adjust`), as.data.frame(gseRRBS)$ID)
reducedTerms_Combined <- reduceSimMatrix(simMatrix,
scores,
orgdb="org.Mm.eg.db")
preparing gene to GO mapping data... preparing IC data...
options(repr.plot.width=10, repr.plot.height=10)
ggplot(reducedTerms_Combined, aes(area = score, subgroup = parentTerm, fill = parentTerm, label = term)) +
geom_treemap(size=0.5, col='white') +
geom_treemap_text(col='gray',place="center",reflow=TRUE) +
geom_treemap_subgroup_border(col='white',size=1) +
geom_treemap_subgroup_text(col='white',place="center",fontface="bold",size=12,reflow=TRUE,grow=TRUE,min.size=1) + theme(plot.title = element_text(hjust=0.5,size=10)) +
guides(fill=F)
corTissue<-rbind(posCorRetina %>% mutate(Sign="pos"), negCorRetina %>% mutate(Sign="neg"),
posCorBrain %>% mutate(Sign="pos"), negCorBrain %>% mutate(Sign="neg"))
corAssay<-rbind(posCorMeth %>% mutate(Sign="pos"), negCorMeth %>% mutate(Sign="neg"),
posCorRna %>% mutate(Sign="pos"), negCorRna %>% mutate(Sign="neg"))
corTissue[1,]
subset(ddply(unique(subset(corTissue,Type=="IR",select=c("gene","tissue"))), .(gene), nrow),V1>1)$gene
subset(ddply(unique(subset(corTissue,Type=="Control",select=c("gene","tissue"))), .(gene), nrow),V1>1)$gene
| gene | Type | tissue | Sign | |
|---|---|---|---|---|
| <chr> | <chr> | <chr> | <chr> | |
| 1 | 1700029J07Rik | IR | RTN | pos |
diffVarRetina<-fread("/home/jupyter/glds202_203/DiffVar/RTN_IR_vs_Control.tsv",header=T,sep="\t")
sort(subset(diffVarRetina,
external_gene_name %in% c(subset(posCorRetina,Type=="IR")$gene) &
(FDR.dispersion<0.5 | FDR.mean<0.5))$external_gene_name)
tmp1<-subset(counts202,grepl("Cul1",external_gene_name))
tmp2<-subset(counts203,grepl("Cul1",external_gene_name))
options(repr.plot.width=10, repr.plot.height=8)
tmp1Long<-melt(subset(tmp1,select=-c(ensembl_gene_id))) %>%
mutate(group=ifelse(grepl("HLLC_IRC",variable),"Control",ifelse(grepl("HLLC_IR_",variable),"IR",
ifelse(grepl("HLU_IRC",variable),"HLU","Combined"))),
timepoint=ifelse(grepl("4mon",variable),"4m",ifelse(grepl("1mon",variable),"1m","7d")))
ggplot(tmp1Long) + geom_boxplot(aes(x=group,y=value,color=group)) + xlab("gene") + ylab("Normalized counts") +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) + facet_wrap(.~external_gene_name,scale="free") + ggtitle("Brain")
tmp2Long<-melt(subset(tmp2,select=-c(ensembl_gene_id))) %>%
mutate(group=ifelse(grepl("HLLC_IRC",variable),"Control",ifelse(grepl("HLLC_IR_",variable),"IR",
ifelse(grepl("HLU_IRC",variable),"HLU","Combined"))),
timepoint=ifelse(grepl("4mon",variable),"4m",ifelse(grepl("1mon",variable),"1m","7d")))
ggplot(tmp2Long) + geom_boxplot(aes(x=group,y=value,color=group)) + xlab("gene") + ylab("Normalized counts") +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) + facet_wrap(.~external_gene_name,scale="free") + ggtitle("Retina")
Warning message in melt(subset(tmp1, select = -c(ensembl_gene_id))): “The melt generic in data.table has been passed a data.frame and will attempt to redirect to the relevant reshape2 method; please note that reshape2 is deprecated, and this redirection is now deprecated as well. To continue using melt methods from reshape2 while both libraries are attached, e.g. melt.list, you can prepend the namespace like reshape2::melt(subset(tmp1, select = -c(ensembl_gene_id))). In the next version, this warning will become an error.” Using external_gene_name as id variables Warning message in melt(subset(tmp2, select = -c(ensembl_gene_id))): “The melt generic in data.table has been passed a data.frame and will attempt to redirect to the relevant reshape2 method; please note that reshape2 is deprecated, and this redirection is now deprecated as well. To continue using melt methods from reshape2 while both libraries are attached, e.g. melt.list, you can prepend the namespace like reshape2::melt(subset(tmp2, select = -c(ensembl_gene_id))). In the next version, this warning will become an error.” Using external_gene_name as id variables
diffVarRetina<-fread("/home/jupyter/glds202_203/DiffVar/RTN_IR_vs_Control.tsv",header=T,sep="\t")
diffVarRetina<-subset(diffVarRetina,select=c("external_gene_name"), FDR.dispersion<0.1)
corGeneListPos<-subset(corMatRetina, `estimate.IR`>0)$gene
corGeneListNeg<-subset(corMatRetina, `estimate.IR`<0)$gene
manifestIR<-subset(manifest,Group=="IR")
for (i in 1:dim(subset(manifestIR))[1]){
tmpMeth<-as.matrix(merged203[,paste0("ratio_",manifestIR$Retina_meth[i])])
tmpRna<-as.matrix(t(apply(merged203[,manifestIR$Retina_rna[i]],1,scale)))
tmp<-subset(merged203,select=c(subset(manifestIR)$Retina_rna[i],
paste0("ratio_",subset(manifest)$Retina_meth[i])),
external_gene_name =="Babam1")
colnames(tmp)<-c("Brain","Retina")
if(i==1){
longRna<-tmp
}
else{
longRna<-rbind(longRna,tmp)
}
}
ggplot(longRna,aes(x=Brain,y=Retina)) + geom_point(size=5) + geom_smooth(method="lm")
manifest[1,]
| Subject | Rep | Tissue | Sample | Timepoint | Group | Brain_meth | Retina_meth | Brain_rna | Retina_rna | Count | Brain_age | Retina_age |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| <chr> | <chr> | <chr> | <chr> | <chr> | <chr> | <chr> | <chr> | <chr> | <chr> | <int> | <dbl> | <dbl> |
| M83 | Rep1 | Both | HLLC_IRC_4mon_Rep1_M83 | 4m | Control | CFG1778 | CFG1837 | Mmus_C57_6J_BRN_HLLC_IRC_4mon_Rep1_M83 | Mmus_C57_6J_RTN_HLLC_IRC_4mon_Rep1_M83 | 4 | 13.3 | 3.4 |
#Network of genes with correlation and differential expression/variability
library("STRINGdb")
string_db <- STRINGdb$new(version="11.5", species=10090,score_threshold=400,input_directory="")
diffVarBrain<-fread("/home/jupyter/glds202_203/DiffVar/BRN_IR_vs_Control.tsv",header=T,sep="\t")
diffVarBrain<-subset(diffVarBrain,select=c("external_gene_name"), FDR.dispersion<1 | FDR.mean<1)
diffVarBrain<-unique(subset(diffVarBrain,
external_gene_name %in% c(subset(posCorBrain,Type=="Control")$gene,
subset(negCorBrain,Type=="Control")$gene,
subset(posCorBrain,Type=="IR")$gene,
subset(negCorBrain,Type=="IR")$gene),
select=c("external_gene_name")))
length(diffVarBrain$external_gene_name)
cat(paste0(sort(diffVarBrain$external_gene_name),sep=","))
cat("\n")
diffVarRetina<-fread("/home/jupyter/glds202_203/DiffVar/RTN_IR_vs_Control.tsv",header=T,sep="\t")
diffVarRetina<-subset(diffVarRetina,select=c("external_gene_name"), FDR.dispersion<1 | FDR.mean<1)
diffVarRetina<-unique(subset(diffVarRetina,
external_gene_name %in% c(subset(posCorRetina,Type=="Control")$gene,
subset(negCorRetina,Type=="Control")$gene,
subset(posCorRetina,Type=="IR")$gene,
subset(negCorRetina,Type=="IR")$gene),
select=c("external_gene_name")))
length(diffVarRetina$external_gene_name)
cat(paste0(sort(diffVarRetina$external_gene_name),sep=","))
df<-merge(diffVarBrain,diffVarRetina,by=c("external_gene_name"))
colnames(df)<-c("gene")
df<-diffVarBrain
colnames(df)<-c("gene")
diffVarBrain$group<-"BRN_IR"
diffVarRetina$group<-"RTN_IR"
df<-rbind(diffVarBrain,diffVarRetina)
tmp<-subset(df, group=="BRN_IR")
tmp$color<-ifelse(tmp$external_gene_name %in% subset(negCorBrain,Type=="IR")$gene, "blue",
ifelse(tmp$external_gene_name %in% subset(posCorBrain,Type=="IR")$gene, "red", "gray"))
mapped = string_db$map(tmp, "external_gene_name",removeUnmappedRows = TRUE )
hits <- mapped$STRING_id
# post payload information to the STRING server
options(repr.plot.width=10, repr.plot.height=10)
payload_id <- string_db$post_payload(mapped$STRING_id, colors=mapped$color )
string_db$plot_network(hits, payload_id=payload_id )
clustersList <- string_db$get_clusters(mapped$STRING_id)
options(repr.plot.width=10, repr.plot.height=10)
par(mfrow=c(1,1))
for(i in seq(5:5)){
tmp<-as.data.frame(clustersList[[i]])
colnames(tmp)<-c("STRING_id")
tmp<-merge(tmp,mapped)
payload_id <- string_db$post_payload(tmp$STRING_id, colors=tmp$color )
string_db$plot_network(tmp$STRING_id, payload_id=payload_id)
}
library(pheatmap)
#corGeneList<-subset(corMatRetina, p.value.Combined<0.05 & estimate.Combined<0 & grepl("Slc",gene))$gene
corGeneList<-c("App", "Babam1", "Braf", "Cdkn1a", "Clk2", "Crb1", "Cry2", "Eif2ak4", "Fbxl17", "Grin1",
"Jun", "Mtor", "Net1", "Pde8b", "Ppp1cc", "Sdf4", "Tlk2")
#tmp1<-t(apply(subset(merged203, select=c(subset(manifest,Group=="IR")$Retina_rna), external_gene_name %in% corGeneList),1,scale))
#tmp2<-t(apply(subset(merged203, select=paste0("ratio_",c(subset(manifest,Group=="IR")$Retina_meth)), external_gene_name %in% corGeneList),1,scale))
tmp1<-subset(merged203, select=c(subset(manifest,Group=="IR")$Retina_rna), external_gene_name %in% corGeneList)
tmp2<-subset(merged203, select=paste0("ratio_",c(subset(manifest,Group=="IR")$Retina_meth)), external_gene_name %in% corGeneList)
tmpMerged<-as.data.frame(cbind(tmp1,tmp2))
colnames(tmpMerged)<-c(paste0(subset(manifest,Group=="IR")$Subject,"_",subset(manifest,Group=="IR")$Retina_rna),
paste0(subset(manifest,Group=="IR")$Subject,"_",subset(manifest,Group=="IR")$Retina_meth))
labels<-as.data.frame(colnames(tmpMerged))
colnames(labels)<-c("Sample")
labels$Subject<-c(subset(manifest,Group=="IR")$Subject,subset(manifest,Group=="IR")$Subject)
labels$Group<-ifelse(grepl("Mmus",labels$Sample),"RNA","RRBS")
labels <- labels %>% column_to_rownames(var="Sample")
tmpMerged<-tmpMerged[ , order(names(tmpMerged))]
rownames(tmpMerged)<-NULL
rownames(tmpMerged)<-paste0(subset(merged203, external_gene_name %in% corGeneList)$external_gene_name,"\n",
subset(merged203, external_gene_name %in% corGeneList)$ensembl_gene_id)
tmpMerged[, "sd"] <- apply(tmpMerged, 1, sd)
tmpMerged<-subset(tmpMerged,sd>0)
tmpMerged<-subset(tmpMerged,select= -c(sd))
dim(tmpMerged)
tmpMerged
| M75_CFG1831 | M75_Mmus_C57_6J_RTN_HLLC_IR_4mon_Rep1_M75 | M80_CFG1835 | M80_Mmus_C57_6J_RTN_HLLC_IR_4mon_Rep2_M80 | M82_CFG1836 | M82_Mmus_C57_6J_RTN_HLLC_IR_4mon_Rep3_M82 | M84_CFG1838 | M84_Mmus_C57_6J_RTN_HLLC_IR_4mon_Rep4_M84 | M89_CFG1842 | M89_Mmus_C57_6J_RTN_HLLC_IR_4mon_Rep5_M89 | |
|---|---|---|---|---|---|---|---|---|---|---|
| <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | |
| App ENSMUSG00000022892 | 0.89772727 | 12.632002 | 0.71356784 | 13.025015 | 0.85600000 | 12.622786 | 0.84126984 | 12.713641 | 0.85416667 | 12.723010 |
| Babam1 ENSMUSG00000031820 | 0.93162393 | 9.401982 | 0.92783505 | 9.450961 | 0.95428571 | 9.324619 | 0.92786885 | 9.457086 | 0.92376682 | 9.448115 |
| Braf ENSMUSG00000002413 | 0.05555556 | 11.354530 | 0.04000000 | 11.225228 | 0.05263158 | 11.390490 | 0.04000000 | 11.210435 | 0.02500000 | 11.194192 |
| Cdkn1a ENSMUSG00000023067 | 0.84536082 | 4.835557 | 0.78308824 | 5.555200 | 0.93362832 | 4.042031 | 0.79220779 | 4.871396 | 0.82876712 | 4.958092 |
| Clk2 ENSMUSG00000068917 | 0.90950226 | 9.410064 | 0.96815287 | 9.241980 | 0.91176471 | 9.402961 | 0.87331536 | 9.566375 | 0.97267760 | 9.211033 |
| Crb1 ENSMUSG00000063681 | 0.93333333 | 10.647286 | 0.95652174 | 10.458752 | 0.91304348 | 10.649546 | 0.90740741 | 10.679026 | 0.96153846 | 10.530957 |
| Cry2 ENSMUSG00000068742 | 0.00000000 | 10.589747 | 0.00000000 | 10.529740 | 0.00000000 | 10.551625 | 0.01123596 | 10.449733 | 0.00000000 | 10.574828 |
| Eif2ak4 ENSMUSG00000005102 | 0.51898734 | 8.900708 | 0.33875339 | 8.748190 | 0.52411576 | 8.822150 | 0.39871383 | 8.800769 | 0.26726727 | 8.687011 |
| Fbxl17 ENSMUSG00000023965 | 0.06250000 | 9.167059 | 0.05844156 | 9.070494 | 0.08227848 | 9.289378 | 0.02431611 | 9.018146 | 0.04964539 | 9.080537 |
| Grin1 ENSMUSG00000026959 | 0.75961538 | 10.706725 | 0.66189112 | 10.539602 | 0.71944444 | 10.646068 | 0.65750916 | 10.602394 | 0.73575130 | 10.713395 |
| Jun ENSMUSG00000052684 | 0.05128205 | 9.159888 | 0.04444444 | 9.113791 | 0.00000000 | 8.963003 | 0.01315789 | 8.987031 | 0.04918033 | 9.121517 |
| Mtor ENSMUSG00000028991 | 0.12500000 | 11.172914 | 0.20454545 | 11.186161 | 0.07792208 | 11.058265 | 0.10416667 | 11.080530 | 0.18965517 | 11.192819 |
| Net1 ENSMUSG00000021215 | 0.07142857 | 7.863090 | 0.10344828 | 7.842599 | 0.04838710 | 7.746317 | 0.01428571 | 7.524709 | 0.00000000 | 7.622799 |
| Pde8b ENSMUSG00000021684 | 0.20512821 | 10.745565 | 0.11174785 | 10.400834 | 0.15503876 | 10.659069 | 0.12474849 | 10.387550 | 0.13333333 | 10.423866 |
| Ppp1cc ENSMUSG00000004455 | 0.05263158 | 11.670157 | 0.04347826 | 11.773812 | 0.09756098 | 11.499555 | 0.01342282 | 11.823114 | 0.04687500 | 11.715357 |
| Sdf4 ENSMUSG00000029076 | 0.00000000 | 11.148365 | 0.09090909 | 11.072143 | 0.11111111 | 11.010371 | 0.02564103 | 11.097625 | 0.02941176 | 11.162984 |
| Tlk2 ENSMUSG00000020694 | 0.24183007 | 10.495098 | 0.41818182 | 10.320391 | 0.29411765 | 10.420339 | 0.20136054 | 10.464869 | 0.26525199 | 10.461974 |
options(repr.plot.width=6, repr.plot.height=5)
pheatmap(tmpMerged,fontsize = 14,annotation_col=labels,
show_rownames=TRUE,show_colnames=FALSE,cluster_cols=FALSE,cluster_rows=FALSE,
clustering_distance_rows = "correlation",clustering_distance_cols = "correlation",
border_color = NA, main="+ve, Combined")
corGeneList<-subset(corMatMeth, p.value.IR<0.05 & estimate.IR<0)$gene
tmp1<-t(apply(subset(mergedMeth, select=paste0("ratio_",c(subset(manifest,Group=="IR")$Brain_meth)), external_gene_name %in% corGeneList),1,scale))
tmp2<-t(apply(subset(mergedMeth, select=paste0("ratio_",c(subset(manifest,Group=="IR")$Retina_meth)), external_gene_name %in% corGeneList),1,scale))
tmpMerged<-as.data.frame(cbind(tmp1,tmp2))
colnames(tmpMerged)<-c(paste0(subset(manifest,Group=="IR")$Subject,"_",subset(manifest,Group=="IR")$Brain_meth),
paste0(subset(manifest,Group=="IR")$Subject,"_",subset(manifest,Group=="IR")$Retina_meth))
labels<-as.data.frame(colnames(tmpMerged))
colnames(labels)<-c("Sample")
labels$Subject<-c(subset(manifest,Group=="IR")$Subject,subset(manifest,Group=="IR")$Subject)
labels$Group<-c(rep("BRN",length(subset(manifest,Group=="IR")$Brain_meth)), rep("RTN",length(subset(manifest,Group=="IR")$Retina_meth)))
labels <- labels %>% column_to_rownames(var="Sample")
tmpMerged<-tmpMerged[ , order(names(tmpMerged))]
rownames(tmpMerged)<-NULL
rownames(tmpMerged)<-paste0(subset(mergedMeth, external_gene_name %in% corGeneList)$external_gene_name,"_",
subset(mergedMeth, external_gene_name %in% corGeneList)$ensembl_gene_id)
tmpMerged[, "sd"] <- apply(tmpMerged, 1, sd)
tmpMerged<-subset(tmpMerged,sd>0)
tmpMerged<-subset(tmpMerged,select= -c(sd))
dim(tmpMerged)
options(repr.plot.width=5, repr.plot.height=5)
pheatmap(tmpMerged,fontsize = 16,annotation_col=labels,
show_rownames=FALSE,show_colnames=FALSE,cluster_cols=FALSE,
clustering_distance_rows = "correlation",clustering_distance_cols = "correlation",
border_color = NA, main="-ve, IR")
simMatrix <- calculateSimMatrix(as.data.frame(oraGO_Combined)$ID,
orgdb="org.Mm.eg.db",
ont="BP",
method="Rel")
scores <- setNames(-log10(as.data.frame(oraGO_Combined)$`p.adjust`), as.data.frame(oraGO_Combined)$ID)
reducedTerms_Combined <- reduceSimMatrix(simMatrix,
scores,
orgdb="org.Mm.eg.db")
ggplot(reducedTerms_Combined, aes(area = score, subgroup = parentTerm, fill = parentTerm, label = term)) +
geom_treemap(size=0.5, col='white') + ggtitle("Combined") +
geom_treemap_text(col='gray',place="center",reflow=TRUE) +
geom_treemap_subgroup_border(col='white',size=1) +
geom_treemap_subgroup_text(col='white',place="center",fontface="bold",size=12,reflow=TRUE,grow=TRUE,min.size=1) + theme(plot.title = element_text(hjust=0.5,size=10)) +
guides(fill=F)
preparing gene to GO mapping data... preparing IC data... Warning message in calculateSimMatrix(as.data.frame(oraGO_Combined)$ID, orgdb = "org.Mm.eg.db", : “Removed 1 terms that were not found in orgdb for BP” Warning message: “`guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.”